{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "84d817fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "%run -i \"../util/util_simple_classifier.ipynb\"\n",
    "%run -i \"../util/file_utils.ipynb\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ec536f46",
   "metadata": {},
   "outputs": [],
   "source": [
    "import openai\n",
    "openai.api_key = OPEN_AI_KEY"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a410acf8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-0.028350897133350372, -0.011136125773191452, -0.0021299426443874836, -0.014453398995101452, -0.012048527598381042, 0.018223850056529045, -0.010247894562780857, -0.01806674897670746, -0.014308380894362926, 0.0007220656843855977, -9.998268797062337e-05, 0.010078707709908485, 0.01148658525198698, -0.014066685922443867, -0.007172317709773779, -0.01833261363208294, 0.038622960448265076, 0.0024985286872833967, 0.034538306295871735, -0.02673153765499592, -0.01109382975846529, -0.0035378197208046913, 0.01887642964720726, -0.023263204842805862, 0.010483548045158386, 0.02091875672340393, -0.00636867992579937, -0.005166244227439165, -0.023263204842805862, -0.003697943175211549, 0.0480007529258728, 0.012471494264900684, -0.036278512328863144, -0.03666522726416588, -0.013124072924256325, -0.024387089535593987, -0.021087944507598877, -0.017184559255838394, -0.0016178501537069678, -0.008713127113878727, 0.001028717029839754, 0.020060736685991287, -0.006785604637116194, 0.004132995381951332, -0.00866478867828846, 0.015528944320976734, -0.006839986424893141, -0.011776619590818882, 0.0037855578120797873, 0.01046542078256607, 0.013812905177474022, 0.0048429761081933975, -0.020749570801854134, -0.01095485407859087, 0.005758398212492466, -0.01658032089471817, -0.02473754808306694, 0.007105851545929909, 0.02142631821334362, -0.014356720261275768, -0.005652656778693199, 0.005610359832644463, -0.018513884395360947, 0.015649791806936264, 0.00814514234662056, 0.0005683623603545129, 0.0040755923837423325, 0.0016163395484909415, 0.02342030592262745, 0.013607463799417019, 0.01806674897670746, 0.004108825698494911, -0.031782977283000946, 0.011577220633625984, 0.016495727002620697, 0.0023353840224444866, -0.028858458623290062, -0.0029018581844866276, 0.009601359255611897, -0.008127016015350819, 0.005842992104589939, -0.019311480224132538, -0.004160186275839806, 0.019009361043572426, -0.0013587826397269964, 0.008858145214617252, -0.0012885398464277387, 0.017269153147935867, 0.0006004626047797501, -0.035070035606622696, -0.0073294201865792274, -0.00419644033536315, 0.0152872484177351, 0.026417331770062447, -0.007994082756340504, 0.01429629698395729, -0.004761403892189264, -0.015238909982144833, -0.004268948920071125, -0.026659028604626656, -0.005549936089664698, -0.0020483704283833504, -0.02346864528954029, -0.006265959236770868, -0.01630236953496933, -0.012054570019245148, 0.00853789784014225, 0.0170757956802845, 0.014199618250131607, 0.006404933985322714, 0.018441377207636833, 0.009504680521786213, 0.01451382227241993, -0.0641460195183754, -0.016471557319164276, -0.00803637970238924, -0.0007824896019883454, -0.010139131918549538, -0.009595316834747791, -0.013679972849786282, -0.012834037654101849, 0.021365894004702568, 0.032532233744859695, -0.01608484424650669, 0.025692244991660118, 0.014441314153373241, -0.031492941081523895, -0.036882754415273666, -0.008314330130815506, 0.018961021676659584, 0.03925137221813202, -0.0014720774488523602, -0.0015022894367575645, 0.004353542346507311, -0.02164384350180626, 0.01705162599682808, -0.010284149087965488, 0.013704142533242702, -0.010477505624294281, -0.015746470540761948, 0.0009131563128903508, 0.036302682012319565, 0.0025242087431252003, -0.020338688045740128, -0.004096740856766701, 0.0038490029983222485, 0.021305469796061516, 0.009516765363514423, 0.01133552473038435, -0.002604270353913307, 0.020060736685991287, -0.024024546146392822, 0.00217223959043622, -0.013788735494017601, -0.02042328007519245, 0.011655772104859352, 0.004096740856766701, 0.02165592834353447, -6.410598871298134e-05, -0.012568172998726368, 0.004380733240395784, -0.015577283687889576, 0.005247816443443298, -0.018658902496099472, 0.007963870652019978, 0.03678607568144798, 0.00681581674143672, 0.01274944469332695, -0.005296155344694853, 0.016773676499724388, 0.011764534749090672, 0.020797908306121826, -0.009160264395177364, 0.018223850056529045, -0.017474593594670296, 0.030477819964289665, 0.0002830482553690672, 0.01376456581056118, -0.030695345252752304, -0.0063324254006147385, -0.01004849560558796, 0.03214551880955696, 0.015081807039678097, 0.0029441548977047205, -0.007039385382086039, 0.012006230652332306, 0.013994176872074604, -0.009462383575737476, -0.020773738622665405, 0.00561942346394062, -0.011371779255568981, 0.019311480224132538, -0.018405122682452202, -0.007824895903468132, -0.6821617484092712, -0.020991265773773193, 0.017438339069485664, 0.0002449056482873857, 0.04408528283238411, 0.021257130429148674, 0.00942612998187542, 0.014175448566675186, 0.015891488641500473, 0.009160264395177364, -0.005114883650094271, 0.02779499813914299, 0.019819041714072227, 0.013269090093672276, 0.017196644097566605, -0.01261651236563921, 0.022501863539218903, -0.04418196156620979, -0.022054726257920265, 0.00047999242087826133, -0.021849285811185837, 0.019516922533512115, -0.0068581136874854565, 0.012652765959501266, 0.023819103837013245, 0.01234460435807705, 0.014151278883218765, -0.015335587784647942, -0.0025574418250471354, -0.012531918473541737, -0.0010309829376637936, 0.009553019888699055, -0.010785667225718498, 0.004966845270246267, 0.05094943940639496, 0.004477411508560181, 0.001632956089451909, 0.03586763143539429, 0.011154253035783768, 0.023009423166513443, 0.0008580195135436952, -0.001290050451643765, 0.003997041378170252, 0.012991140596568584, -0.030695345252752304, -0.00517228664830327, 0.017390001565217972, 0.010247894562780857, 0.021063774824142456, 0.006701011210680008, 0.019226888194680214, -0.005531808827072382, 0.023347798734903336, 0.018223850056529045, -0.012205629609525204, -0.016399048268795013, 0.03158961981534958, -0.0033837389200925827, 0.015105976723134518, 0.00414205901324749, -0.010471463203430176, 0.0005525011220015585, -0.0079034473747015, 0.013257005251944065, -0.007450267672538757, 0.009172349236905575, -0.0017583357403054833, -0.00636867992579937, 0.005181350279599428, -0.010646692477166653, 0.0007179115782491863, 0.006978961173444986, -0.010229767300188541, 0.0013648250605911016, 0.024544190615415573, -0.0018293338362127542, 0.002407892607152462, -0.014791772700846195, -0.011891424655914307, 0.009021289646625519, -0.017015373334288597, -0.00023754149151500314, -0.0036858583334833384, -0.015093891881406307, 0.04452033340930939, 0.001637487905099988, -0.02264687977731228, 0.001299113966524601, 0.020519958809018135, -0.016640745103359222, 0.02061663754284382, 0.0026480776723474264, 0.028471745550632477, -0.015673961490392685, -0.016773676499724388, -0.0014267595252022147, -0.0041662282310426235, 0.008972950279712677, -0.005262922495603561, -0.013607463799417019, -0.02317861095070839, -0.015408096835017204, 0.0032779970206320286, -0.009879308752715588, -0.00815722718834877, -0.009154221974313259, 0.000952431873884052, -0.0003510251408442855, 0.013051563873887062, -0.02143840305507183, -0.0120001882314682, -0.018574308604002, -0.000899560924153775, -0.021353809162974358, 0.009353620931506157, -0.026054788380861282, 0.029003474861383438, 0.007963870652019978, 0.016266116872429848, -0.03306396305561066, -0.001499268226325512, 0.02545055001974106, 0.0018958001164719462, -0.008846060372889042, -0.002978898584842682, 0.028616761788725853, -0.0018021430587396026, -0.015347672626376152, -0.014876365661621094, -0.016954949125647545, -0.006278044078499079, 0.019565261900424957, 0.002969834953546524, 0.0043958392925560474, 0.012290222570300102, -0.03374071046710014, 0.008018252439796925, -0.00586414011195302, 0.029704393818974495, -0.028640931472182274, -0.033257320523262024, 0.000425988546339795, -0.016991203650832176, 0.00048112537479028106, 0.02473754808306694, -0.017390001565217972, -0.0034683323465287685, -0.0063022137619555, 0.0037190914154052734, -0.010229767300188541, -0.0035770952235907316, 0.015408096835017204, -0.01731749251484871, 0.010078707709908485, 0.01516640093177557, 0.005426066927611828, -0.0004297650302760303, 0.005096756387501955, -0.03405491262674332, 0.0037825366016477346, 0.023094017058610916, -0.007855108007788658, -0.01134760957211256, 0.0056496355682611465, 0.011172380298376083, -0.006102814804762602, -0.0030544286128133535, -0.009770546108484268, -0.03613349795341492, -0.018513884395360947, -0.0017099966062232852, 0.016423217952251434, 0.018151341006159782, -0.0052931341342628, 0.003812748473137617, 0.011667856946587563, -0.007764472160488367, -0.028906797990202904, 0.00878563616424799, -0.00967990979552269, 0.019976144656538963, 0.0030770874582231045, -0.014900535345077515, -0.00010253181972075254, 0.020205754786729813, 0.01631445437669754, 0.008489559404551983, 0.008719169534742832, 0.001046844176016748, 0.0008791678701527417, -0.013111988082528114, 0.017160389572381973, -0.02418164722621441, 0.0002872023906093091, 0.006009157747030258, 0.005039353854954243, -0.0007265975000336766, -0.002194898435845971, 0.01703954115509987, 0.0362301766872406, 0.007154190447181463, 0.014054601080715656, 0.021800946444272995, -0.014429229311645031, 0.02242935448884964, -0.019686108455061913, -9.74335489445366e-05, -0.015516859479248524, 0.01882809028029442, 0.025643905624747276, 0.010737328790128231, -0.004181334283202887, -0.006006136536598206, -0.017390001565217972, 0.03035697154700756, 0.034610815346241, -0.012991140596568584, 0.007190444972366095, -0.0035166714806109667, 0.00611187843605876, 0.010640650056302547, 0.00682790158316493, 5.131311627337709e-05, -0.008713127113878727, -0.016664912924170494, 0.016193607822060585, 0.004963824059814215, 0.00465566199272871, 0.003350505605340004, -0.003350505605340004, -0.03216968849301338, 0.003900363342836499, 0.0021344744600355625, 0.009830969385802746, -0.022332675755023956, -0.02344447560608387, 0.026973232626914978, -0.02825421839952469, 0.028423406183719635, -0.01120259240269661, 0.010145174339413643, -0.007299208082258701, -0.005858097691088915, 0.01806674897670746, 0.012435239739716053, 0.005510660354048014, 0.010151216760277748, 0.0064774430356919765, -0.016906609758734703, 0.01197601854801178, -0.009861181490123272, -0.005271986126899719, -0.004800679627805948, 0.007498607039451599, 0.010350615717470646, -0.01653198152780533, 0.014429229311645031, 0.018731411546468735, 0.014562161639332771, 0.03540841117501259, 0.012199587188661098, 0.016423217952251434, 0.0029169642366468906, -0.010235809721052647, 0.0022251103073358536, 7.203662971733138e-05, -0.010241852141916752, 0.0016525938408449292, -0.0003034413093701005, 0.04258676990866661, -0.023831188678741455, -0.024350835010409355, -0.005477427039295435, -0.022767728194594383, 0.005719122942537069, -0.022731473669409752, 0.001708486001007259, 0.002418466843664646, 0.005232710391283035, 0.00290487939491868, -0.011945806443691254, -0.03395823761820793, 0.029003474861383438, 0.03154128044843674, -0.01196393370628357, -0.01629028469324112, 0.01519057061523199, -0.004477411508560181, -0.010042453184723854, 0.024157477542757988, 0.01301530934870243, 0.031251244246959686, 0.012459409423172474, -0.00828411802649498, -0.005927585531026125, 0.011734322644770145, 0.014344635419547558, -0.005755377002060413, 0.006954791955649853, 0.010628565214574337, 0.007812811061739922, -0.0005883778212592006, -0.008211608976125717, -0.010030368342995644, 0.022078895941376686, -0.006471400614827871, -0.004697958938777447, -0.0005691176629625261, -0.0002722852223087102, -0.03236304596066475, 0.020532043650746346, -0.002303661545738578, 0.0018988213269039989, -0.015250993892550468, -0.006767477840185165, -0.01300322450697422, -0.017897561192512512, -0.015528944320976734, 0.024616699665784836, -0.005546914879232645, -0.008755424059927464, -0.03702776879072189, -0.03386155888438225, -0.0032931030727922916, 0.10412247478961945, 0.01516640093177557, 0.009250900708138943, 0.014139194041490555, -0.0048429761081933975, -0.013800820335745811, -0.02241726964712143, -0.004504602402448654, -0.016713252291083336, 0.0009554530261084437, 0.004271970130503178, -0.019794872030615807, 0.013208665885031223, 0.02270730398595333, 0.0039758929051458836, -0.0037432610988616943, -0.013257005251944065, -0.02624814584851265, 0.0036919007543474436, -0.008972950279712677, -0.005413982085883617, -0.004694937728345394, -0.004852039739489555, 0.02827838808298111, 0.027649980038404465, -0.004528772085905075, 0.00151361885946244, -8.898364467313513e-05, 0.010302276350557804, -0.027142418548464775, 0.013909583911299706, 0.014090855605900288, -0.016205692663788795, -0.017728375270962715, -0.012531918473541737, -0.00877355132251978, 0.018223850056529045, 0.00802429486066103, 0.013281174935400486, -0.0023399158380925655, 0.018731411546468735, 0.01134760957211256, 0.016109013929963112, -0.021535079926252365, 0.030526157468557358, -0.02089458703994751, -0.021305469796061516, 0.040677376091480255, 0.014332550577819347, -0.020290348678827286, 0.002093688352033496, 0.0029501973185688257, 0.015976080670952797, -0.0029728561639785767, 0.010918600484728813, 0.026610689237713814, -0.00865270383656025, 0.010127047076821327, 0.001732655568048358, -0.01147450041025877, 0.00032383439247496426, -0.024532105773687363, 0.01455007679760456, -0.02905181422829628, 0.0029909834265708923, -0.018429292365908623, -0.012447324581444263, -0.00408465601503849, -0.016918694600462914, 0.022501863539218903, 0.02085833251476288, -0.007546945940703154, -0.015686046332120895, 0.019831126555800438, 0.0347316637635231, -0.01963776908814907, 0.018284274265170097, -0.0037160704378038645, 0.021776776760816574, 0.013087818399071693, -0.013087818399071693, -0.008374753408133984, 0.0011835532495751977, -0.019734447821974754, -0.026973232626914978, 0.0053535583429038525, 0.005344494711607695, -0.010266021825373173, 0.0016873376443982124, 0.02089458703994751, 0.0087977210059762, 0.0027643938083201647, 0.035021696239709854, -0.006290128920227289, 0.02704574167728424, 0.010882345959544182, 0.021619673818349838, 0.00010489212581887841, -0.0015128635568544269, 0.005377727560698986, 0.005377727560698986, -0.03371654078364372, -0.001304401084780693, -0.0020483704283833504, -0.0030060894787311554, 0.010543972253799438, 0.019819041714072227, 0.022018471732735634, -0.016205692663788795, -0.007516734302043915, 0.006797689478844404, -0.013824990019202232, 0.025088006630539894, 0.00547138461843133, 0.013571209274232388, 0.005275007337331772, -0.018767666071653366, 0.020834162831306458, -0.005528787616640329, 0.008453304879367352, 0.007371716666966677, -0.016157353296875954, 0.0029577503446489573, 0.02137797884643078, 0.0007096032495610416, 0.003392802318558097, -0.0020166479516774416, -0.026683198288083076, 0.006069581490010023, 0.0056345295161008835, 0.015226825140416622, 0.03485250845551491, -0.01654406636953354, -0.014670925214886665, -0.018791835755109787, 0.004030274692922831, 0.015601453371345997, -0.013559124432504177, 0.000312882533762604, -0.02627231553196907, -0.02443542890250683, 0.014066685922443867, 0.011788704432547092, -0.036544378846883774, -0.015951910987496376, -0.03523922339081764, -0.004975908901542425, -0.00611187843605876, -0.030187783762812614, 0.02470129355788231, -0.04862916097044945, 0.011879339814186096, 0.01057418342679739, 0.016737421974539757, -0.0003593334404285997, 0.003948702476918697, 0.0013648250605911016, -0.007963870652019978, 0.010278106667101383, 0.031758807599544525, 0.015504774637520313, 0.008356626145541668, 0.013124072924256325, -0.002608802169561386, 0.007897404953837395, -0.032508064061403275, 0.008755424059927464, 0.004036317113786936, 0.004725149367004633, 0.027190757915377617, 0.013196581043303013, 0.012308349832892418, -0.010948811657726765, 0.011178422719240189, -0.0066526723094284534, 0.014501737430691719, 0.007855108007788658, -0.025643905624747276, -0.0030589604284614325, 0.0009939732262864709, -0.003450205083936453, 0.0038429605774581432, -0.016181522980332375, 0.0038610876072198153, 0.016761591657996178, 0.00993973296135664, 0.04007313400506973, -0.016701167449355125, 0.012592342682182789, -0.007172317709773779, 0.04253843054175377, -0.022090980783104897, -0.004416987765580416, -0.0005985743482597172, -0.005145095754414797, -0.03260473906993866, -0.02064080722630024, -0.009873266331851482, -0.007565073203295469, -0.008489559404551983, 0.011184465140104294, 0.009655740112066269, -0.026103127747774124, 0.0022115150932222605, 0.007806769106537104, 0.026078958064317703, -0.01757127232849598, -0.01858639344573021, 0.0007239539409056306, -0.017015373334288597, 0.009124009869992733, -0.026610689237713814, -0.02781916782259941, -0.03536007180809975, -0.009317366406321526, 0.010368742980062962, -0.01008475013077259, 0.02398829162120819, -0.012628596276044846, -0.008743339218199253, -0.007075639441609383, 0.02293691597878933, 0.01427212730050087, 0.008368710987269878, -0.0034381202422082424, 0.020604552701115608, -0.008695000782608986, -0.015867318958044052, 0.027384115383028984, 0.0004947207635268569, -0.01915437914431095, 0.009492595680058002, 0.010598353110253811, -0.010634607635438442, 0.0006914761033840477, 0.015057637356221676, 0.014997214078903198, -0.02296108566224575, -0.00031930257682688534, 0.018961021676659584, -0.029221002012491226, -0.0023081933613866568, -0.017269153147935867, 0.001532501308247447, -0.0027643938083201647, 0.031033718958497047, -0.025861432775855064, 0.015105976723134518, -0.00024131797545123845, 0.00028210412710905075, -0.009450298734009266, 0.027915844693779945, -0.02087041735649109, -0.0017220813315361738, -0.015383927151560783, 0.007824895903468132, -0.0025362935848534107, -0.02064080722630024, -0.010090792551636696, 0.0050725871697068214, -0.013051563873887062, 0.02041119523346424, -0.005284070502966642, 0.0017296343576163054, 0.024797972291707993, -0.01276152953505516, -0.024797972291707993, -0.014900535345077515, 0.009655740112066269, 0.023335713893175125, -0.0216921828687191, -0.012966970913112164, -0.005658698733896017, 0.018042579293251038, -0.014356720261275768, 0.00020166479225736111, -0.026562349870800972, -0.013559124432504177, 0.009855139069259167, 0.005788610316812992, -0.01966193877160549, -0.013704142533242702, 0.0034290568437427282, 0.0030785980634391308, -0.0035861588548868895, -0.021196706220507622, -0.023867443203926086, 0.0036435616202652454, -0.007758429739624262, -0.018743496388196945, -0.013957922346889973, -0.02396412193775177, 0.006640587467700243, 0.026054788380861282, -0.004927569534629583, -0.016169438138604164, 0.003836918156594038, 0.011099872179329395, -0.01108174491673708, 0.046961460262537, -0.0028822203166782856, -0.010284149087965488, -0.021535079926252365, 0.00752277672290802, 0.0024441471323370934, -0.02723909728229046, 0.022296421229839325, 0.0020483704283833504, -0.015915656462311745, 0.0008723701466806233, 0.0004769712104462087, -0.010489590466022491, -0.0025408254005014896, -0.009220688603818417, 0.01300322450697422, 0.001314220018684864, 0.003398844739422202, 0.0045771109871566296, -0.011788704432547092, 0.024375004693865776, -0.00031911375117488205, -0.0035529257729649544, 0.014429229311645031, -0.04181334376335144, 0.0017885476117953658, 0.0143929747864604, -0.0001168824965134263, 0.001171468524262309, -0.03105788864195347, -0.005562020931392908, -0.03881631791591644, 0.01679784618318081, 0.0037070068065077066, -0.005154159385710955, 0.019045615568757057, 0.0019622663967311382, -0.012966970913112164, 0.0077221752144396305, 0.0077765570022165775, 0.007957828231155872, 0.0007575647323392332, 0.028447575867176056, 0.014320465736091137, 0.023843273520469666, -0.003954744897782803, -0.000799861503764987, -0.029148492962121964, -0.02342030592262745, -0.015673961490392685, -0.008229736238718033, -0.013027394190430641, 0.015516859479248524, 0.004299161024391651, -0.011087787337601185, -0.018477631732821465, 0.0007194221252575517, -0.02192179299890995, -0.021510910242795944, -0.03509420529007912, 0.00720857223495841, 0.013293259777128696, -0.00321757304482162, -0.024544190615415573, 0.010133089497685432, 0.004592217039316893, 0.014719263650476933, -0.002569526666775346, -0.018187595531344414, 0.0068943677470088005, -0.007559030782431364, 0.004634513519704342, -0.026320654898881912, -0.03108205832540989, -0.012411070987582207, 0.028133371844887733, -0.007818853482604027, 0.022526033222675323, -0.008247863501310349, -0.003432078054174781, 0.010731286369264126, -0.005827886052429676, 0.016386963427066803, 0.010229767300188541, 0.018513884395360947, 0.010290191508829594, -0.03110622800886631, -0.0167857613414526, 0.004604301881045103, -0.004087677225470543, -0.0014675456332042813, 0.0018927789060398936, 0.0021012413781136274, -0.004924548324197531, -0.0037916002329438925, -0.01363163348287344, -0.0043988605029881, -0.004830891266465187, -0.0007681389106437564, -0.006507654674351215, -0.005338452290743589, -0.023384051397442818, -0.00017315226432401687, 0.0013399001909419894, 0.0014033452607691288, 0.008495601825416088, 0.006483485456556082, 0.0134987011551857, -0.005655677989125252, 0.01737791672348976, -0.009957860223948956, -0.006259916815906763, -0.011250931769609451, 0.009015247225761414, -0.008966907858848572, 0.014960959553718567, -0.008966907858848572, -0.020979180932044983, -0.009698037058115005, -0.0181150883436203, 0.0026163551956415176, -0.03927553817629814, 0.008078676648437977, -0.026876553893089294, -0.005287091713398695, -0.010912558063864708, -0.0073052505031228065, 0.015383927151560783, -0.02702157199382782, 0.001679784618318081, 0.028399236500263214, -0.017438339069485664, -0.012701105326414108, -0.0166165754199028, -0.0071964873932302, -0.0006869442877359688, 0.009752418845891953, 0.24652954936027527, -0.023553239181637764, -0.012314392253756523, 0.013232835568487644, 0.003480417188256979, 0.026973232626914978, 0.01836886815726757, -0.014912620186805725, -0.004429072607308626, 0.0058248648419976234, 0.005755377002060413, 0.00758924288675189, -0.003046875586733222, 0.00510279880836606, 0.002693395595997572, -0.011420118622481823, -0.0173054076731205, -0.023529069498181343, -0.012114993296563625, -0.034030746668577194, -0.0025589524302631617, 0.004120910540223122, -0.011571178212761879, -0.0013769097859039903, 0.03084036335349083, 0.012205629609525204, -0.00674935057759285, 0.030719514936208725, 0.019722362980246544, -0.0020075843203812838, -0.004401881713420153, -0.011637644842267036, -0.012024357914924622, -0.017970070242881775, 0.01161951757967472, -0.01106966007500887, -0.010646692477166653, -0.011492627672851086, 0.011752449907362461, 0.0014176958939060569, -0.012447324581444263, -0.00993973296135664, -0.018683072179555893, -0.007123978808522224, 0.021365894004702568, 0.03190382197499275, -0.01455007679760456, 0.004105804488062859, 0.013051563873887062, 0.015963995829224586, -0.020205754786729813, -0.014936789870262146, 0.011389906518161297, 0.03618183732032776, 0.013897499069571495, 0.0022507905960083008, 0.02271938882768154, 0.009456341154873371, -0.01353495568037033, -0.003540840931236744, 0.0033202937338501215, 0.003003068268299103, 0.004432093817740679, -0.00472212815657258, -0.0021163474302738905, 0.02651401050388813, -0.01884017512202263, -0.01004849560558796, 0.0030242165084928274, -0.00905754417181015, 0.006888325326144695, -0.016145268455147743, 0.0022175575140863657, -0.008489559404551983, -0.015746470540761948, -0.017438339069485664, 0.03282226622104645, 0.02142631821334362, 0.03260473906993866, 0.004704001359641552, 0.004290097393095493, 0.002119368640705943, -0.01631445437669754, -0.01072524394840002, -0.018803920596837997, -0.04058069735765457, 0.01934773474931717, 0.006489527877420187, 0.021583419293165207, -0.0010007709497585893, 0.002776478650048375, -0.007105851545929909, 0.014078770764172077, -0.018767666071653366, 0.016725337132811546, 0.0010815879795700312, -0.004111846908926964, 0.012157290242612362, 0.014755518175661564, 0.0003563122299965471, -0.008725211955606937, -0.04094323888421059, 0.00851372815668583, -0.008695000782608986, 0.006888325326144695, -0.0045015811920166016, -0.008338498882949352, -0.01685827039182186, 0.004700980149209499, -0.025547228753566742, -0.013740397058427334, -0.01212103571742773, 0.012253968045115471, 0.014211703091859818, -0.006616417784243822, -0.0021541123278439045, -0.0003089172241743654, -0.014574246481060982, 0.030429480597376823, -0.028350897133350372, -0.009818884544074535, -0.01706371083855629, -0.0027009486220777035, -0.006773520261049271, 0.004459284245967865, -0.01964985392987728, -0.012471494264900684, 0.007679878734052181, -0.015649791806936264, -0.03185548260807991, 0.018924767151474953, 0.013921668753027916, 0.012797784060239792, -0.022828152403235435, 0.0014033452607691288, 0.01285820733755827, 0.0008814337779767811, 0.005616402253508568, -0.0077221752144396305, 0.01274944469332695, -0.010054538026452065, -0.004888294264674187, 0.03144460171461105, 0.003374675288796425, -8.479410098516382e-06, -0.015081807039678097, 0.03596431016921997, -0.013607463799417019, -0.016894524917006493, -0.005918521899729967, -0.009111925028264523, -0.006936664693057537, -0.017426254227757454, -0.014368805103003979, 0.02876177988946438, -0.02093084156513214, -0.023287374526262283, -0.021329639479517937, -0.011589305475354195, 0.015673961490392685, -0.012507748790085316, 0.0025725478772073984, 0.031154567375779152, 0.007111893966794014, -0.009782630950212479, -0.009655740112066269, -0.15130145847797394, 0.022248083725571632, 0.021305469796061516, 0.0012356688966974616, 0.007752387318760157, 0.009172349236905575, 0.014755518175661564, -0.02018158510327339, 0.011009235866367817, 0.013994176872074604, 0.01236877404153347, -0.006229704711586237, -0.028350897133350372, -0.02425415627658367, 0.013909583911299706, -0.03816374018788338, -0.017003288492560387, 0.023577408865094185, 0.023311544209718704, 0.01288237702101469, -0.0012122546322643757, 0.0031782975420355797, 0.01516640093177557, -0.02445959858596325, 0.0027794998604804277, 0.002956239739432931, 0.0023550218902528286, 0.0016012334963306785, -0.029680224135518074, 0.0004807477234862745, -0.02726326696574688, -0.0010634608333930373, 0.01042916625738144, 0.008398923091590405, 0.023359883576631546, 0.008477474562823772, -0.005936649162322283, 0.0007715377723798156, 0.008876272477209568, -0.0005615646950900555, 0.03081619367003441, 0.023094017058610916, 0.0007073373417370021, -0.007450267672538757, -0.008840017952024937, 0.00714210607111454, 0.045511286705732346, 0.0014222277095541358, -0.010779624804854393, -0.007794684264808893, 0.0015528944786638021, -0.019734447821974754, 0.01212103571742773, 0.005114883650094271, 0.030139444395899773, -0.0032779970206320286, -0.027142418548464775, 0.028713440522551537, 0.0018580352189019322, -0.016882440075278282, -0.005994051694869995, -0.014598416164517403, 0.006453273352235556, -0.01635070890188217, -0.015541029162704945, -0.0063868071883916855, -0.010634607635438442, -0.001180532039143145, -0.003492501797154546, 0.018816005438566208, -0.010477505624294281, -0.006072602700442076, 0.013571209274232388, 0.0016586362617090344, -0.00241695623844862, 0.021365894004702568, -0.03755950182676315, 0.02471337839961052, -0.004190397914499044, -0.026707367971539497, 0.0013451871927827597, 0.026344824582338333, -0.0029864516109228134, 0.014223787933588028, -0.011244889348745346, 0.02622397616505623, 0.005063523538410664, -0.019057700410485268, 0.009160264395177364, 8.133624214679003e-05, 0.0383087582886219, -0.02600644901394844, -0.026103127747774124, -0.011649729683995247, 0.0037402398884296417, 0.0087977210059762, 0.011057575233280659, 0.017776712775230408, 0.0029955152422189713, -0.015263078734278679, 0.013136157765984535, -0.0071964873932302, -0.009879308752715588, 0.017945900559425354, 0.033499013632535934, 0.020205754786729813, 0.0398072712123394, 0.003764409339055419, 0.027359945699572563, 0.011794746853411198, -0.02014533057808876, 0.016217777505517006, 0.016737421974539757, 0.0027296501211822033, 0.014151278883218765, 0.018501801416277885, 0.02239309996366501, -0.023081932216882706, 0.030477819964289665, 0.023287374526262283, 0.041426632553339005, -0.0033686328679323196, -0.02474963292479515, 0.016918694600462914, 0.00027341817622072995, 0.0006065049674361944, -0.07850274443626404, -0.02648984082043171, 0.013438276946544647, 0.01300322450697422, 0.00011716573499143124, 0.04198253154754639, -0.014888450503349304, 0.012211672030389309, -0.0134987011551857, 0.025329701602458954, -0.028157541528344154, -0.015468520112335682, -0.012054570019245148, -0.0033686328679323196, 0.027625810354948044, -0.019939890131354332, -0.015250993892550468, -0.011734322644770145, -0.037825364619493484, 0.005978945642709732, -0.012241884134709835, 0.011051532812416553, -0.015226825140416622, -0.004093719646334648, 0.017643781378865242, -0.011969976127147675, -0.024036630988121033, 0.029124323278665543, 0.011601390317082405, -0.030139444395899773, 0.002438104711472988, -0.012308349832892418, -0.010664819739758968, -0.04328768700361252, -0.009510722942650318, 0.006350552663207054, 0.002830859972164035, 0.007287123240530491, 0.03729363530874252, -0.021293384954333305, 0.001756825135089457, 0.008562067523598671, 0.000543059897609055, -0.029196832329034805, 0.01452590711414814, -0.0029245170298963785, -0.005969882011413574, 0.004719107411801815, 0.016628660261631012, -0.012991140596568584, -0.021595504134893417, -3.08020316879265e-05, -0.014441314153373241, -0.006773520261049271, 0.029873579740524292, -0.024362919852137566, 0.013220750726759434, 0.011027363128960133, -0.02982524037361145, 0.013184496201574802, -0.0016948905540630221, 0.0020347749814391136, 0.007873235270380974, 0.0030121318995952606, -0.01527516357600689, -0.010827964171767235, -0.0027976268902420998, -0.014211703091859818, 0.014646755531430244, -0.02344447560608387, 0.010912558063864708, 0.01833261363208294, -0.019287310540676117, 0.0024350835010409355, -0.029631884768605232, -0.005794652737677097, -0.022078895941376686, -0.008290160447359085, 0.021003350615501404, -0.0026752685662359, -0.02545055001974106, 0.008435177616775036, -0.028616761788725853, -0.025426380336284637, 0.02953520603477955, 0.019674023613333702, -0.000675992458127439, 0.019565261900424957, 0.0066889263689517975, -0.005202498286962509, -0.0042206100188195705, 0.01148658525198698, 0.0003676417109090835, -0.03618183732032776, -0.004640555940568447, 0.0004898112965747714, 0.025692244991660118, -0.013027394190430641, 0.023903697729110718, 0.01687035523355007, -0.018948936834931374, -0.017015373334288597, -0.0480007529258728, 0.01300322450697422, 0.0016722315922379494, -0.009347578510642052, 0.028109202161431313, -0.009359663352370262, -0.009226731024682522, 0.00031137195765040815, 0.008489559404551983, -0.021341724321246147, -0.005764440633356571, 0.003148085670545697, 0.014973044395446777, -0.003359569236636162, -0.008465389721095562, -0.0211725365370512, 0.03929970785975456, 0.008060549385845661, -0.006580163724720478, -0.01285820733755827, 0.008960865437984467, 0.03833292797207832, 0.013269090093672276, 0.0006978961173444986, 0.00094714475562796, -0.012314392253756523, -0.024157477542757988, 0.01810300350189209, -0.03180714324116707, -0.01711205020546913, -0.004039338324218988, -0.02648984082043171, 0.013824990019202232, 0.009365705773234367, -0.013039479032158852, -0.06303422152996063, 0.004057465586811304, 0.02320278063416481, 0.009933690540492535, 0.0005457033985294402, -0.0008738807518966496, -0.028616761788725853, 0.017136219888925552, 0.0017915688222274184, -0.0047281705774366856, 0.001584616955369711, -0.027577470988035202, 0.007051470223814249, -0.008942738175392151, 0.00300004705786705, 0.004211546387523413, 0.013812905177474022, -0.004096740856766701, -0.017414169386029243, -0.0216921828687191, -0.019516922533512115, -0.0026964168064296246, 0.005691932048648596, -0.0070091732777655125, 0.004519708454608917, 0.05225459486246109, 0.020205754786729813, 0.003812748473137617, -0.01174036506563425, 0.006991046015173197, -0.008562067523598671, -0.012133120559155941, 0.014634670689702034, -0.0015725322300568223, -0.03562593460083008, -0.02494298852980137, -0.016266116872429848, 0.021112114191055298, 0.02803669311106205, 0.010550014674663544, -0.013667888008058071, 0.010175386443734169, 0.0059759244322776794, 0.00213145324960351, 0.024870479479432106, 0.033499013632535934, 0.01222375687211752, -0.015963995829224586, 0.011015278287231922, 0.003323314944282174, 0.02367408759891987, -0.01966193877160549, 0.01502138376235962, -0.007371716666966677, 0.02018158510327339, -0.010018283501267433, -0.002784031443297863, 0.0017145284218713641, 0.02651401050388813, 0.012556088156998158, 0.018961021676659584, -0.01044125109910965, -0.022284336388111115, -0.01682201586663723, 0.035045865923166275, -0.0004354297707322985, -0.004063508007675409, -0.00814514234662056, -0.014670925214886665, 0.007625496946275234, -0.0021828135941177607, -0.04200670123100281, -0.02166801318526268, -0.007468394935131073, 0.014767603017389774, 0.013160327449440956, 0.022260166704654694, 0.00958323199301958, -0.00179912184830755, 0.0019093954470008612, -0.004870167002081871, 0.004329373128712177, 0.005392833612859249, -0.017752544954419136, 0.02368617244064808, 0.02118462137877941, 0.0021450486965477467, 0.02704574167728424, -0.02448376826941967, 0.0013059116899967194, 0.024024546146392822, 0.0070091732777655125, -0.005888309795409441, -0.004120910540223122, -0.001918459078297019, 0.007679878734052181, -0.015988165512681007, -0.005287091713398695, -0.008803763426840305, -0.018719326704740524, -0.011734322644770145, 0.016205692663788795, 0.02087041735649109, -0.016459472477436066, 0.0423450730741024, -0.02367408759891987, -0.018755581229925156, 0.00536262197420001, -0.006265959236770868, 0.019516922533512115, 0.0007877767202444375, 0.025692244991660118, -0.005543893668800592, -0.008398923091590405, 0.010610437951982021, 0.0017719310708343983, -0.01732957735657692, -0.012398986145853996, -0.007268995977938175, -0.0018837152747437358, -0.014223787933588028, -0.0021586441434919834, -0.002033264609053731, -0.00017768405086826533, 0.007401928771287203, 0.032483894377946854, 0.016447387635707855, 0.015746470540761948, -0.028205880895256996, -0.023057762533426285, 0.02067706175148487, 0.009269027039408684, -0.014622585847973824, -0.011293228715658188, 0.009099840186536312, -0.03154128044843674, -0.046961460262537, -0.005015184171497822, 0.003522713901475072, -0.008761466480791569, -0.006045411806553602, -0.040870729833841324, -0.010809836909174919, 0.03613349795341492, -0.008997119963169098, -0.0004550675512291491, -0.032991454005241394, -0.02419373206794262, 0.03192799165844917, -0.005120926070958376, -0.01864681765437126, -0.004456263035535812, -0.014743433333933353]\n"
     ]
    }
   ],
   "source": [
    "model = \"text-embedding-ada-002\"\n",
    "text = \"I love jazz\"\n",
    "response = openai.Embedding.create(\n",
    "    input=text,\n",
    "    model=model\n",
    ")\n",
    "embeddings = response['data'][0]['embedding']\n",
    "print(embeddings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "504fa4c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_sentence_vector(text, model):\n",
    "    text = \"I love jazz\"\n",
    "    response = openai.Embedding.create(\n",
    "        input=text,\n",
    "        model=model\n",
    "    )\n",
    "    embeddings = response['data'][0]['embedding']\n",
    "    return embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e938f00f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Found cached dataset rotten_tomatoes (/home/zhenya/.cache/huggingface/datasets/rotten_tomatoes/default/1.0.0/40d411e45a6ce3484deed7cc15b82a53dad9a72aafd9f86f8f227134bec5ca46)\n",
      "Found cached dataset rotten_tomatoes (/home/zhenya/.cache/huggingface/datasets/rotten_tomatoes/default/1.0.0/40d411e45a6ce3484deed7cc15b82a53dad9a72aafd9f86f8f227134bec5ca46)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OpenAI embeddings: 704.3250799179077 s\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.49      0.82      0.62       160\n",
      "           1       0.47      0.16      0.23       160\n",
      "\n",
      "    accuracy                           0.49       320\n",
      "   macro avg       0.48      0.49      0.43       320\n",
      "weighted avg       0.48      0.49      0.43       320\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "vectorize = lambda x: get_sentence_vector(x, model)\n",
    "(train_df, test_df) = load_train_test_dataset_pd()\n",
    "start = time.time()\n",
    "(X_train, X_test, y_train, y_test) = create_train_test_data(train_df, test_df, vectorize)\n",
    "print(f\"OpenAI embeddings: {time.time() - start} s\")\n",
    "clf = train_classifier(X_train, y_train)\n",
    "test_classifier(test_df, clf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "609d0f27",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
